*! version 5.0
* 13 August 2018
* NIDS
* Master Income do file for Nids Wave 1

* THIS IS 4th INCOME DO FILE - PERFORMING IMPUTATIONS FOR MISSING DATA: 4 OF 7
* THIS DO FILE PERFORMS IMPUTATIONS FOR MISSING DATA ON THE RELEVANT INCOME VARIABLES

*=====================================================================================================================================
* GLOBALS FOR DATA FILES, DO FILES AND VERSION SUFFIXES

* DEFINED IN "W1 Income do file (1 of 7)"

*=====================================================================================================================================

* OPENING DATASET PREPARED IN PRIOR DO FILE "Income - Preparing variables for imputation (3 of 7).DO"

set more off

use "$DataOUT\prepdata.dta", clear

*-------------------------------------------------------------------------------------------------------------------------------------

*Imputation of gross wages from net wages
gen templnfwag=lnfwag
regress lnfwag_g templnfwag /*basically an elasticity*/
predict lnfwag_ghat_ni if working==1
gen fwag_ghat_ni=exp(lnfwag_ghat_ni)
gen fwag_gimpute_ni=1 if working==1 & fwag_g==. & fwag_ghat_ni!=.
replace fwag_gimpute_ni=0 if fwag_gimpute_ni!=1
replace fwag_g=fwag_ghat_ni if fwag_gimpute_ni==1 
replace lnfwag_g=ln(fwag_g)

*Imputation of gross wages
xi: regress lnfwag_g male race_d coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms homeroomssq homerooms_d i.intmonth
predict lnfwag_ghat
xi: impute lnfwag_g male coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms, gen(lnfwag_ghat_2)
gen fwag_gimpute_2=1 if working==1 & lnfwag_ghat==. & lnfwag_g==. & lnfwag_ghat_2!=.
replace fwag_gimpute_2=0 if fwag_gimpute_2!=1
replace lnfwag_ghat=lnfwag_ghat_2 if working==1 & lnfwag_ghat==. & lnfwag_g==. & lnfwag_ghat_2!=.
gen fwag_ghat=exp(lnfwag_ghat)

gen fwag_gimpute=1 if working==1 & fwag_ghat!=. & fwag_g==. 
replace fwag_gimpute=0 if fwag_gimpute!=1
replace fwag_g=fwag_ghat if fwag_gimpute==1

*Imputation summary variable for gross wages
gen fwag_g_flg=1 if fwag_g_d==1
replace fwag_g_flg=2 if fwag_gimpute==1
replace fwag_g_flg=3 if fwag_g==. & working==1
replace fwag_g_flg=4 if fwag_gimpute_ni==1
replace fwag_g_flg=5 if fwag_gimpute_2==1
label variable fwag_g_flg "The gross wage data for this individual is:"
label define fwag_g_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 4 "Imputed from net" 5 "Imputed using 'impute'"
label values fwag_g_flg fwag_g_flg

*-------------------------------------------------------------------------------------------------------------------------------------

* IMPUTING FOR NEW WAGES

*Imputation of net wages from gross wages
gen templnfwag_g=lnfwag_g if fwag_g_flg==1
regress lnfwag_p templnfwag_g /*basically an elasticity*/
predict lnfwag_phat_gi if working==1
gen fwag_phat_gi=exp(lnfwag_phat_gi)
gen fwag_pimpute_gi=1 if working==1 & fwag_p==. & fwag_phat_gi!=.
replace fwag_pimpute_gi=0 if fwag_pimpute_gi!=1
replace fwag_p=fwag_phat_gi if fwag_pimpute_gi==1 
replace lnfwag_p=ln(fwag_p)

*Imputation of net wages using point estimate data ONLY for estimation
xi: regress lnfwag_p male race_d coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms homeroomssq homerooms_d i.intmonth
predict lnfwag_phat if working==1
xi: impute lnfwag_p male coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms, gen(lnfwag_phat_2)
gen fwag_pimpute_2=1 if working==1 & lnfwag_phat==. & lnfwag_p==. & lnfwag_phat_2!=.
replace fwag_pimpute_2=0 if fwag_pimpute_2!=1
replace lnfwag_phat=lnfwag_phat_2 if working==1 & lnfwag_phat==. & lnfwag_p==. & lnfwag_phat_2!=.
gen fwag_phat=exp(lnfwag_phat)
gen fwag_pimpute=1 if working==1 & fwag_phat!=. & fwag_p==.
replace fwag_pimpute=0 if fwag_pimpute!=1
replace fwag_p=fwag_phat if fwag_pimpute==1

*Imputation summary variable for net wages
gen fwag_p_flg=1 if fwag_p_d==1
replace fwag_p_flg=2 if fwag_pimpute==1
replace fwag_p_flg=3 if fwag_p==. & working==1
replace fwag_p_flg=4 if fwag_pimpute_gi==1
replace fwag_p_flg=5 if fwag_pimpute_2==1
label variable fwag_p_flg "The net wage data for this individual (point estimates only) is:"
label define fwag_p_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 4 "Imputed from Gross" 5 "Imputed using 'impute'"
label values fwag_p_flg fwag_p_flg

*-------------------------------------------------------------------------------------------------------------------------------------

* IMPUTING FOR NET WAGES INCLUDING INCOME BRACKETS


*Imputation of net wages INCLUDING INCOME BRACKETS from gross wages
regress lnfwag templnfwag_g /*basically an elasticity of 0.876 - i.e. an increase in gross wages of 1% is associated with a 0.876% increase in net wages*/
predict lnfwaghat_gi if working==1
gen fwaghat_gi=exp(lnfwaghat_gi)
gen fwagimpute_gi=1 if working==1 & fwag==. & fwaghat_gi!=.
replace fwagimpute_gi=0 if fwagimpute_gi!=1
replace fwag=fwaghat_gi if fwagimpute_gi==1 
replace lnfwag=ln(fwag)

*Imputation of net wages INCLUDING INCOME BRACKETS
xi: regress lnfwag male race_d coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms homeroomssq homerooms_d i.intmonth proxy
predict lnfwaghat if working==1
xi: impute lnfwag male coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms, gen(lnfwaghat_2)
gen fwagimpute_2=1 if working==1 & lnfwaghat==. & lnfwag==. & lnfwaghat_2!=.
replace fwagimpute_2=0 if fwagimpute_2!=1
replace lnfwaghat=lnfwaghat_2 if working==1 & lnfwaghat==. & lnfwag==. & lnfwaghat_2!=.
gen fwaghat=exp(lnfwaghat)
gen fwagimpute=1 if working==1 & fwaghat!=. & fwag==.
replace fwagimpute=0 if fwagimpute!=1
replace fwag=fwaghat if fwagimpute==1

*Imputation summary variable for net wages INCLUDING INCOME BRACKETS
gen fwag_flg=1 if fwag_d==1
replace fwag_flg=2 if fwagimpute==1
replace fwag_flg=3 if fwag==. & working==1
replace fwag_flg=4 if fwagimpute_gi==1
replace fwag_flg=5 if fwagimpute_2==1
label variable fwag_flg "The net wage data for this individual (INCLUDING INCOME BRACKETS) is:"
label define fwag_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 4 "Imputed from Gross" 5 "Imputed using 'impute'"
label values fwag_flg fwag_flg

drop lnfwag_p lnfwag lnfwag_g templnfwag templnfwag_g

*-------------------------------------------------------------------------------------------------------------------------------------

* IMPUTING FOR 13TH CHEQUE


*Imputation of 13th cheque
gen templnfwag_g=ln(fwag_g) if fwag_g_flg==1
replace templnfwag_g=0 if templnfwag_g==.
gen templnfwag_g_d=templnfwag_g!=0
xi: regress lncheq_pa templnfwag_g templnfwag_g_d male race_d coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms homeroomssq homerooms_d i.intmonth
predict lncheq_pahat if cheq_pa_rec==1
xi: impute lncheq_pa male coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms, gen(lncheq_pahat_2)
gen cheq_paimpute_2=1 if cheq_pa_rec==1 & lncheq_pahat==. & lncheq_pa==. & lncheq_pahat_2!=.
replace cheq_paimpute_2=0 if cheq_paimpute_2!=1
replace lncheq_pahat=lncheq_pahat_2 if cheq_pa_rec==1 & lncheq_pahat==. & lncheq_pa==. & lncheq_pahat_2!=.
gen cheq_pahat=exp(lncheq_pahat)
gen cheq_paimpute=1 if cheq_pa_rec==1 & cheq_pahat!=. & cheq_pa==.
replace cheq_paimpute=0 if cheq_paimpute!=1
replace cheq_pa=cheq_pahat if cheq_paimpute==1
replace cheq=cheq_pa/12
label variable cheq "Monthly income from 13th cheque (i.e. 1/12 of 13th cheque)"

*Imputation summary variable for 13th cheque
gen cheq_flg=1 if cheq_pa!=.
replace cheq_flg=2 if cheq_paimpute==1
replace cheq_flg=3 if cheq_pa==. & cheq_pa_rec==1
replace cheq_flg=5 if cheq_paimpute_2==1
label variable cheq_flg "The 13th cheque data for this individual is:"
label define cheq_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 5 "Imputed using 'impute'"
label values cheq_flg cheq_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputing for profit share

*Coefficient of variation
quietly sum prof_pa, detail
gen prof_pa_cv=r(sd)/r(mean)
tab prof_pa_cv

*Imputation of income from profit share
gen prof_paimpute=.

*Imputation summary variable for profit share
gen prof_flg=1 if prof_pa_d==1
replace prof_flg=2 if prof_paimpute==1
replace prof_flg=3 if prof_pa==. & prof_pa_rec==1
label variable prof_flg "The profit share data for this individual is:"
label define prof_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values prof_flg prof_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputing for other bonus payments

*Imputation of other bonus
xi: regress lnbonu_pa templnfwag_g templnfwag_g_d male race_d coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms homeroomssq homerooms_d i.intmonth
predict lnbonu_pahat if bonu_pa_rec==1
xi: impute lnbonu_pa male coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms, gen(lnbonu_pahat_2)
gen bonu_paimpute_2=1 if bonu_pa_rec==1 & lnbonu_pahat==. & lnbonu_pa==. & lnbonu_pahat_2!=.
replace bonu_paimpute_2=0 if bonu_paimpute_2!=1
replace lnbonu_pahat=lnbonu_pahat_2 if bonu_pa_rec==1 & lnbonu_pahat==. & lnbonu_pa==. & lnbonu_pahat_2!=.
gen bonu_pahat=exp(lnbonu_pahat)
gen bonu_paimpute=1 if bonu_pa_rec==1 & bonu_pahat!=. & bonu_pa==.
replace bonu_paimpute=0 if bonu_paimpute!=1
replace bonu_pa=bonu_pahat if bonu_paimpute==1
replace bonu=bonu_pa/12

*Imputation summary variable for other bonus
gen bonu_flg=1 if bonu_pa!=.
replace bonu_flg=2 if bonu_paimpute==1
replace bonu_flg=3 if bonu_pa==. & bonu_pa_rec==1
replace bonu_flg=5 if bonu_paimpute_2==1
label variable bonu_flg "The other bonus data for this individual is:"
label define bonu_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 5 "Imputed using 'impute'"
label values bonu_flg bonu_flg

drop templnfwag_g

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputing for extra at piece rate

*Coefficient of variation
quietly sum extr, detail
gen extr_cv=r(sd)/r(mean)
tab extr_cv

*Imputation of income at piece rate
gen extrimpute=.

*Imputation summary variable for extra payments
gen extr_flg=1 if extr_d==1
replace extr_flg=2 if extrimpute==1
replace extr_flg=3 if extr==. & extr_rec==1
label variable extr_flg "The extra payment data for this individual is:"
label define extr_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 
label values extr_flg extr_flg


*-------------------------------------------------------------------------------------------------------------------------------------

***Imputing for casual job wages data


*Imputation of casual fwag_ps using point estimate data ONLY for estimation
xi: regress lncwag_p male race_d coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms homeroomssq homerooms_d i.intmonth proxy
predict lncwag_phat if cworking==1
xi: impute lncwag_p male coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional proxy, gen(lncwag_phat_2)
gen cwag_pimpute_2=1 if cworking==1 & lncwag_phat==. & lncwag_p==. & lncwag_phat_2!=.
replace lncwag_phat=lncwag_phat_2 if cworking==1 & lncwag_phat==. & lncwag_p==. & lncwag_phat_2!=.
gen cwag_phat=exp(lncwag_phat)

gen cwag_pimpute=1 if cwag_phat!=. & cwag_p==.
replace cwag_pimpute=0 if cwag_phat==. | cwag_p!=.
replace cwag_p=cwag_phat if cwag_pimpute==1

*Imputation of casual wages using income brackets
xi: regress lncwag male race_d coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms homeroomssq homerooms_d  i.intmonth proxy
predict lncwaghat if cworking==1
xi: impute lncwag_p male coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional proxy, gen(lncwaghat_2)
gen cwagimpute_2=1 if cworking==1 & lncwaghat==. & lncwag==. & lncwaghat_2!=.
replace lncwaghat=lncwaghat_2 if cworking==1 & lncwaghat==. & lncwag==. & lncwaghat_2!=.
gen cwaghat=exp(lncwaghat)

gen cwagimpute=1 if cwaghat!=. & cwag==.
replace cwagimpute=0 if cwaghat==. | cwag!=.
replace cwag=cwaghat if cwagimpute==1

*Imputation summary variable for casual wages
gen cwag_p_flg=1 if cwag_p_d==1
replace cwag_p_flg=2 if cwag_pimpute==1
replace cwag_p_flg=3 if cwag_p==. & cworking==1
replace cwag_p_flg=5 if cwag_pimpute_2==1
label variable cwag_p_flg "The casual wages data for this individual (point estimates only) is:"
label define cwag_p_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 5 "Imputed using 'impute'"
label values cwag_p_flg cwag_p_flg

*Imputation summary variable for casual wages including brackets
gen cwag_flg=1 if cwag_d==1
replace cwag_flg=2 if cwagimpute==1
replace cwag_flg=3 if cwag==. & cworking==1
replace cwag_flg=5 if cwagimpute_2==1
label variable cwag_flg "The casual wage data for this individual (including brackets & proxies) is:"
label define cwag_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 5 "Imputed using 'impute'"
label values cwag_flg cwag_flg



*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for self employment


*Imputation of self-employment earnings using point estimate data ONLY for estimation
xi: regress lnswag_p male race_d coloured asian_indian white age age_d agesq easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms homeroomssq homerooms_d  i.intmonth proxy
predict lnswag_phat if sworking==1
xi: impute lnswag_p male coloured asian_indian white age age_d agesq easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional proxy, gen(lnswag_phat_2)
gen swag_pimpute_2=1 if sworking==1 & lnswag_phat==. & lnswag_p==. & lnswag_phat_2!=.
replace lnswag_phat=lnswag_phat_2 if sworking==1 & lnswag_phat==. & lnswag_p==. & lnswag_phat_2!=.
gen swag_phat=exp(lnswag_phat)

gen swag_pimpute=1 if swag_phat!=. & swag_p==.
replace swag_pimpute=0 if swag_phat==. | swag_p!=.
replace swag_p=swag_phat if swag_pimpute==1

*Imputation of self-employment earnings using income brackets
xi: regress lnswag male race_d coloured asian_indian white age age_d agesq easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms homeroomssq homerooms_d i.intmonth proxy
predict lnswaghat if sworking==1
xi: impute lnswag male coloured asian_indian white age age_d agesq easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional proxy, gen(lnswaghat_2)
gen swagimpute_2=1 if sworking==1 & lnswaghat==. & lnswag==. & lnswaghat_2!=.
replace lnswaghat=lnswaghat_2 if sworking==1 & lnswaghat==. & lnswag==. & lnswaghat_2!=.
gen swaghat=exp(lnswaghat)

gen swagimpute=1 if swaghat!=. & swag==.
replace swagimpute=0 if swaghat==. | swag!=.
replace swag=swaghat if swagimpute==1

*Imputation summary variable for self-employment earnings
gen swag_p_flg=1 if swag_p_d==1
replace swag_p_flg=2 if swag_pimpute==1
replace swag_p_flg=3 if swag_p==. & sworking==1
replace swag_p_flg=5 if swag_pimpute_2==1
label variable swag_p_flg "The self-employment earnings data for this individual (point estimates only) is:"
label define swag_p_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 4 "Imputed using 'impute'"
label values swag_p_flg swag_p_flg

*Imputation summary variable for self-employment earnings including brackets
gen swag_flg=1 if swag_d==1
replace swag_flg=2 if swagimpute==1
replace swag_flg=3 if swag==. & sworking==1
replace swag_flg=5 if swagimpute_2==1
label variable swag_flg "The self-employment earnings data for this individual (including brackets & proxies) is:"
label define swag_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 5 "Imputed using 'impute'"
label values swag_flg swag_flg


*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for help friends business

*Coefficient of variation
quietly sum help, detail
gen hf_cv=r(sd)/r(mean)
tab hf_cv

gen helpimpute=.

*Imputation summary variable for help friend business earnings
gen help_flg=1 if help_d==1
replace help_flg=2 if helpimpute==1
replace help_flg=3 if help==. & help_rec==1
label variable help_flg "The earning from helping a friend data for this individual is:"
label define help_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values help_flg help_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for state (RSA) old age pension


*People who do not give a figure for their old-age pension are assigned to the max number for the month before that which they were interviewed
*Jan-April (870), May-Oct (940), Nov-Dec (960)

*Imputation of income from state old age pension
gen spenhat=870 if spen_rec==1 & (intmonth<=4 | intmonth==.)
replace spenhat=940 if spen_rec==1 & intmonth<=10 & intmonth>4
replace spenhat=960 if spen_rec==1 & intmonth>10 & intmonth!=.
gen spenimpute=1 if spenhat!=. & spen==.
replace spenimpute=0 if spenhat==. | spen!=.
replace spen=spenhat if spenimpute==1

*Imputation summary variable for state (RSA) old age pension
gen spen_flg=1 if spen_d==1
replace spen_flg=2 if spenimpute==1
label variable spen_flg "The old age state (RSA) pension data for this individual is:"
label define spen_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 4 "Unit imputed"
label values spen_flg spen_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for private pension or foreign pension variables

*Imputation of income from private pension or foreign pension payment
xi: regress lnppen male race_d coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms homeroomssq homerooms_d i.intmonth proxy
predict lnppenhat if ppen_rec==1
xi: impute lnppen male coloured asian_indian white age age_d agesq tradeunion easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional proxy, gen(lnppenhat_2)
gen ppenimpute_2=1 if ppen_rec==1 & lnppenhat==. & lnppen==. & lnppenhat_2!=.
replace lnppenhat=lnppenhat_2 if ppen_rec==1 & lnppenhat==. & lnppen==. & lnppenhat_2!=.
gen ppenhat =exp(lnppenhat)

gen ppenimpute=1 if ppenhat!=. & ppen==.
replace ppenimpute=0 if ppenhat==. | ppen!=.
replace ppen=ppenhat if ppenimpute==1

*Imputation summary variable private pension or foreign pension payment
gen ppen_flg=1 if ppen_d==1
replace ppen_flg=2 if ppenimpute==1
replace ppen_flg=3 if ppen==. & ppen_rec==1
replace ppen_flg=5 if ppenimpute_2==1
label variable ppen_flg "The private pension and foriegn pension payment data for this individual is:"
label define ppen_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 5 "Imputed using 'impute'"
label values ppen_flg ppen_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for UIF income variables


*Coefficient of variation
quietly sum uif, detail
gen uif_cv=r(sd)/r(mean)
tab uif_cv

*Imputation of income from UIF payments
gen uifimpute=.

*Imputation summary variable for UIF income
gen uif_flg=1 if uif_d==1
replace uif_flg=2 if uifimpute==1
replace uif_flg=3 if uif==. & uif_rec==1
label variable uif_flg "The UIF income data for this individual is:"
label define uif_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values uif_flg uif_flg


*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for workmens compensation variables


*Coefficient of variation
quietly sum comp, detail
gen comp_cv=r(sd)/r(mean)
tab comp_cv

*Imputation of income from workers compensation payments
gen compimpute=.

*Imputation summary variable for compensation income
gen comp_flg=1 if comp_d==1
replace comp_flg=2 if compimpute==1
replace comp_flg=3 if comp==. & comp_rec==1
label variable comp_flg "The workmens compensation income data for this individual is:"
label define comp_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values comp_flg comp_flg


*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for disability grant variables

*People who do not give a figure for their disability grant are assigned to the max number for the month before that which they were interviewed
*Jan-April (870), May-Oct (940), Nov-Dec (960)

*Imputation of income from disability grants
gen dishat=870 if dis_rec==1 & (intmonth<=4 | intmonth==.)
replace dishat=940 if dis_rec==1 & intmonth<=10 & intmonth>4
replace dishat=960 if dis_rec==1 & intmonth>10 & intmonth!=.
gen disimpute=1 if dishat!=. & dis==.
replace disimpute=0 if dishat==. | dis!=.
replace dis=dishat if disimpute==1

*Imputation summary variable for disability grant
gen dis_flg=1 if dis_d==1
replace dis_flg=2 if disimpute==1
replace dis_flg=3 if dis==. & dis_rec==1
label variable dis_flg "The disability grant data for this individual is:"
label define dis_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values dis_flg dis_flg


*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for child support grant variables


*People who do not give a figure for their child grant are assigned to the max number for the month before that which they were interviewed multiplied by the number of biological children they have
*Jan-April (210), May-Oct (220), Nov-Dec (230)

*Imputation of income from CSG
gen chldhat=210*biochildren if chld_rec==1 & (intmonth<=4 | intmonth==.)
replace chldhat=220*biochildren if chld_rec==1 & intmonth<=10 & intmonth>4
replace chldhat=230*biochildren if chld_rec==1 & intmonth>10 & intmonth!=.
gen chldimpute=1 if chldhat!=. & chld==.
replace chldimpute=0 if chldhat==. | chld!=.
replace chld=chldhat if chldimpute==1
replace chld_rec=0 if chld==. & biochildren==. & biochild==0 /*These people report no biological children and give no number for child grant, probably an error CHECK!!!*/

*Imputation summary variable for child support grant
gen chld_flg=1 if chld_d==1
replace chld_flg=2 if chldimpute==1
replace chld_flg=3 if chld==. & chld_rec==1
label variable chld_flg "The child support grant data for this individual is:"
label define chld_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 4 "Unit imputed"
label values chld_flg chld_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for foster care grant variables


*People who do not give a figure for their foster child grant are assigned to the max number for the month before that which they were interviewed multiplied by the number of biological children they have
*Jan-April (610), May-Oct (650), Nov-Dec (680)

*Imputation of income from foster care grants
gen fosthat=610 if fost_rec==1 & (intmonth<=4 | intmonth==.)
replace fosthat=650 if fost_rec==1 & intmonth<=10 & intmonth>4
replace fosthat=680 if fost_rec==1 & intmonth>10 & intmonth!=.
gen fostimpute=1 if fosthat!=. & fost==.
replace fostimpute=0 if fosthat==. | fost!=.
replace fost=fosthat if fostimpute==1

*Imputation summary variable for foster care grant
gen fost_flg=1 if fost_d==1
replace fost_flg=2 if fostimpute==1
replace fost_flg=3 if fost==. & fost_rec==1
label variable fost_flg "The foster care grant data for this individual is:"
label define fost_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values fost_flg fost_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for care dependency grant variables


*People who do not give a figure for their cape dependency grant are assigned to the max number for the month before that which they were interviewed
*Jan-April (870), May-Oct (940), Nov-Dec (960)

*Imputation of income from care dependency grants
gen carehat=870 if care_rec==1 & (intmonth<=4 | intmonth==.)
replace carehat=940 if care_rec==1 & intmonth<=10 & intmonth>4
replace carehat=960 if care_rec==1 & intmonth>10 & intmonth!=.
gen careimpute=1 if carehat!=. & care==.
replace careimpute=0 if carehat==. | care!=.
replace care=carehat if careimpute==1

*Imputation summary variable for care dependency grant
gen care_flg=1 if care_d==1
replace care_flg=2 if careimpute==1
replace care_flg=3 if care==. & care_rec==1
label variable care_flg "The care dependency grant data for this individual is:"
label define care_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values care_flg care_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for interest/dividend income variables


*Coefficient of variation
quietly sum indi, detail
gen intdiv_cv=r(sd)/r(mean)
tab intdiv_cv

*Alternative imputation
sum indi, detail
return list
gen temp=r(p50)
gen indiimpute=1 if indi_rec==1 & indi==.
replace indi=temp if indi==. & indi_rec==1
drop temp

*Imputation summary variable for interest/dividend income
gen indi_flg=1 if indi_d==1
replace indi_flg=2 if indiimpute==1
replace indi_flg=3 if indi==. & indi_rec==1
label variable indi_flg "The interest/dividend data for this individual is:"
label define indi_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values indi_flg indi_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for inheritance variables


*Coefficient of variation
quietly sum inhe, detail
gen inherit_cv=r(sd)/r(mean)
tab inherit_cv

*Imputation of income from inheritance payment
gen inheimpute=.

*Imputation summary variable for inheritance payment
gen inhe_flg=1 if inhe_d==1
replace inhe_flg=2 if inheimpute==1
replace inhe_flg=3 if inhe==. & inhe_rec==1
label variable inhe_flg "The inheritance payment data for this individual is:"
label define inhe_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values inhe_flg inhe_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for rental income

*Coefficient of variation
quietly sum rnt, detail
gen rnt_cv=r(sd)/r(mean)
tab rnt_cv

*Imputation of rental income
xi: regress lnrnt male race_d coloured asian_indian white age age_d agesq easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional homerooms homeroomssq homerooms_d i.intmonth proxy
predict lnrnthat if rnt_rec==1
xi: impute lnrnt male coloured asian_indian white age age_d agesq easterncape northerncape freestate kwazulunatal northwest gauteng mpumalanga limpopo schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours postgrad roster_married farm traditional proxy, gen(lnrnthat_2)
gen rntimpute_2=1 if rnt_rec==1 & lnrnthat==. & lnrnt==. & lnrnthat_2!=.
replace lnrnthat=lnrnthat_2 if rnt_rec==1 & lnrnthat==. & lnrnt==. & lnrnthat_2!=.
gen rnthat=exp(lnrnthat)

gen rntimpute=1 if rnthat!=. & rnt==.
replace rntimpute=0 if rnthat==. | rnt!=.
replace rnt=rnthat if rntimpute==1

*Imputation summary variable for rental income
gen rnt_flg=1 if rnt_d==1
replace rnt_flg=2 if rntimpute==1
replace rnt_flg=3 if rnt==. & rnt_rec==1
replace rnt_flg=4 if rntimpute_2==1
label variable rnt_flg "The rental income data for this individual is:"
label define rnt_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing"  4 "Imputed using 'impute'"
label values rnt_flg rnt_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for Retrenchment payment


*Coefficient of variation
quietly sum retr, detail
gen retrench_cv=r(sd)/r(mean)
tab retrench_cv

*Imputation of income from retrenchment payment
gen retrimpute=.

*Imputation summary variable for retrenchment payment
gen retr_flg=1 if retr_d==1
replace retr_flg=2 if retrimpute==1
replace retr_flg=3 if retr==. & retr_rec==1
label variable retr_flg "The retrenchment payment data for this individual is:"
label define retr_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values retr_flg retr_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for lobola/bride wealth payments variables


*Coefficient of variation
quietly sum brid, detail
gen bride_cv=r(sd)/r(mean)
tab bride_cv

*Imputation of income from lobola/bride wealth payment
gen bridimpute=.

*Imputation summary variable for lobola/bride wealth payment
gen brid_flg=1 if brid_d==1
replace brid_flg=2 if bridimpute==1
replace brid_flg=3 if brid==. & brid_rec==1
label variable brid_flg "The lobola/bride wealth payment data for this individual is:"
label define brid_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values brid_flg brid_flg


*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for gift income variables


*Coefficient of variation
quietly sum gift, detail
gen gift_cv=r(sd)/r(mean)
tab gift_cv

*Imputation of income from gifts
gen giftimpute=.

*Imputation summary variable for gift payment
gen gift_flg=1 if gift_d==1
replace gift_flg=2 if giftimpute==1
replace gift_flg=3 if gift==. & gift_rec==1
label variable gift_flg "The gift payment data for this individual is:"
label define gift_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values gift_flg gift_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for repayment of loans to you variables


*Coefficient of variation
quietly sum loan, detail
gen loan_cv=r(sd)/r(mean)
tab loan_cv

*Imputation of income from loan repayment
gen loanimpute=.

*Imputation summary variable for loan repayment income
gen loan_flg=1 if loan_d==1
replace loan_flg=2 if loanimpute==1
replace loan_flg=3 if loan==. & loan_rec==1
label variable loan_flg "The loan repayment income data for this individual is:"
label define loan_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values loan_flg loan_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for Sale of household goods variables


*Coefficient of variation
quietly sum sale, detail
gen sale_cv=r(sd)/r(mean)
tab sale_cv

*Imputation of income from the sale of household goods
gen saleimpute=.

*Imputation summary variable for the sale of household goods income
gen sale_flg=1 if sale_d==1
replace sale_flg=2 if saleimpute==1
replace sale_flg=3 if sale==. & sale_rec==1
label variable sale_flg "The sale of household goods data for this individual is:"
label define sale_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values sale_flg sale_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for 'other' income variables


*Imputation summary variable for other income
gen othe_flg=1 if othe!=.
label variable othe_flg "The other income data for this individual is:"
label define othe_flg 1 "Survey"
label values othe_flg othe_flg

label variable othe_flg "Monthly income from 'other' sources"

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations for total remittance income variables

*Imputation summary variable for other income
gen remt_flg=1 if remt!=.
label variable remt_flg "The total remittance income data for this individual is:"
label define remt_flg 1 "Survey"
label values remt_flg remt_flg

*-------------------------------------------------------------------------------------------------------------------------------------

***Imputations of household total income figure


*Imputation
sort w1_hhid pid
xi: regress lnhhq_incb i.province i.hometype i.homewalls i.homeroof homerooms i.hhrace hhrace_d farm traditional hhedu hhedusq proxy i.intmonth hhage hhage_d hhtu if w1_hhid!=w1_hhid[_n-1]
predict lnhhq_incbhat if w1_hhid!=w1_hhid[_n-1]
gen hhq_incbhat=exp(lnhhq_incbhat)
gen hhq_incbimpute=1 if hhq_incbhat!=. & hhq_incb==. & w1_hhid!=w1_hhid[_n-1]
replace hhq_incbimpute=0 if hhq_incbhat==. | hhq_incb!=. & w1_hhid!=w1_hhid[_n-1]
replace hhq_incb=hhq_incbhat if hhq_incbimpute==1
xi: impute lnhhq_incb i.province homerooms i.hhrace hhrace_d farm traditional hhedu hhedusq proxy hhage hhage_d hhtu if w1_hhid!=w1_hhid[_n-1], gen(lnhhq_incbhat2)
gen hhq_incbimpute2=1 if hhq_incb==. & lnhhq_incbhat2!=.
replace hhq_incb=exp(lnhhq_incbhat2) if hhq_incbimpute2==1

*Imputation summary variable for household total income figure
sort w1_hhid pid
gen hhq_incb_flg=1 if hhq_incb_d==1 & w1_hhid!=w1_hhid[_n-1]
replace hhq_incb_flg=2 if hhq_incbimpute==1 & w1_hhid!=w1_hhid[_n-1]
replace hhq_incb_flg=3 if hhq_incb==. & w1_hhid!=w1_hhid[_n-1]
replace hhq_incb_flg=5 if hhq_incbimpute2==1
label variable hhq_incb_flg "The household income as per household questionnaire data for this individual is:"
label define hhq_incb_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 5 "Imputed using 'impute'", modify
label values hhq_incb_flg hhq_incb_flg

*Setting the value for all members of the household
egen hhq_incbtemp=max(hhq_incb), by(w1_hhid)
replace hhq_incb=hhq_incbtemp if hhq_incb==.
egen hhq_incb_dtemp=sum(hhq_incb_d), by(w1_hhid)
replace hhq_incb_d=hhq_incb_dtemp
egen hhq_incbimputetemp=sum(hhq_incbimpute), by(w1_hhid)
replace hhq_incbimpute=hhq_incbimputetemp
egen hhq_incb_flgtemp=max(hhq_incb_flg), by(w1_hhid)
replace hhq_incb_flg=hhq_incb_flgtemp
egen hhq_incbhattemp=max(hhq_incbhat), by(w1_hhid)
replace hhq_incbhat=hhq_incbhattemp if hhq_incbhat==.
drop *temp

*-------------------------------------------------------------------------------------------------------------------------------------

***Implied rental income


***Rent expense - done here for comparison purposes with implied rentals

*Imputation
sort w1_hhid pid
xi: regress lnrent i.province i.hometype i.homewalls i.homeroof homerooms homeroomssq homerooms_d i.hhrace farm traditional hhedu hhedusq i.intmonth hhage hhage_d hhtu if w1_hhid!=w1_hhid[_n-1]
predict lnrenthat if w1_hhid!=w1_hhid[_n-1] & homestatus==1
gen renthat=exp(lnrenthat)
gen rentimpute=1 if renthat!=. & rent==. & w1_hhid!=w1_hhid[_n-1]
replace rentimpute=0 if lnrenthat==. | rent!=. & w1_hhid!=w1_hhid[_n-1]
replace rent=renthat if rentimpute==1

*Imputation summary
sort w1_hhid pid
gen rent_flg=1 if rent!=. & homestatus==1 & w1_hhid!=w1_hhid[_n-1]
replace rent_flg=2 if rentimpute==1 & homestatus==1 & w1_hhid!=w1_hhid[_n-1]
replace rent_flg=3 if rent==. & homestatus==1 & w1_hhid!=w1_hhid[_n-1]
label variable rent_flg "The household income as per household questionnaire data for this individual is:"
label define rent_flg 1 "Survey" 2 "Imputed" 3 "Not imputed", modify
label values rent_flg rent_flg
replace lnrent=ln(rent)

*Setting the value for all members of the household
egen renttemp=max(rent), by(w1_hhid)
replace rent=renttemp
egen rent_flgtemp=max(rent_flg), by(w1_hhid)
replace rent_flg=rent_flgtemp
drop *temp



***Dont own don't rent

*Imputation
sort w1_hhid pid
char homewalls [omit] 6
char homeroof [omit] 1
xi: regress lnrent_would i.province i.hometype i.homewalls i.homeroof homerooms homeroomssq homerooms_d i.hhrace farm traditional hhedu hhedusq i.intmonth hhage hhage_d hhtu if w1_hhid!=w1_hhid[_n-1]
predict lnrent_wouldhat if w1_hhid!=w1_hhid[_n-1] & homestatus==4 
gen rent_wouldhat=exp(lnrent_wouldhat)
gen rent_wouldimpute=1 if rent_wouldhat!=. & rent_would==. & w1_hhid!=w1_hhid[_n-1]
replace rent_wouldimpute=0 if lnrent_wouldhat==. | rent_would!=. & w1_hhid!=w1_hhid[_n-1]
replace rent_would=rent_wouldhat if rent_wouldimpute==1

*Imputation summary
sort w1_hhid pid
gen rent_would_flg=1 if rent_would!=. & homestatus==4 & w1_hhid!=w1_hhid[_n-1]
replace rent_would_flg=2 if rent_wouldimpute==1 & homestatus==4 & w1_hhid!=w1_hhid[_n-1]
replace rent_would_flg=3 if rent_would==. & homestatus==4 & w1_hhid!=w1_hhid[_n-1]
label variable rent_would_flg "The household income as per household questionnaire data for this individual is:"
label define rent_would_flg 1 "Survey" 2 "Imputed" 3 "Not imputed", modify
label values rent_would_flg rent_would_flg
replace lnrent_would=ln(rent_would)

*Setting the value for all members of the household
egen rent_wouldtemp=max(rent_would), by(w1_hhid)
replace rent_would=rent_wouldtemp
egen rent_would_flgtemp=max(rent_would_flg), by(w1_hhid)
replace rent_would_flg=rent_would_flgtemp
drop *temp


***Homeowners

*Imputation for homeowners (mortgage and non-mortgage)
sort w1_hhid pid
xi: regress lnrent_could i.province i.hometype i.homewalls i.homeroof homerooms homeroomssq homerooms_d i.hhrace farm traditional hhedu hhedusq i.intmonth hhage hhage_d hhtu mortgage if w1_hhid!=w1_hhid[_n-1]
predict lnrent_couldhat if w1_hhid!=w1_hhid[_n-1] & (homestatus==2 | homestatus==3) 
gen rent_couldhat=exp(lnrent_couldhat)
gen rent_couldimpute=1 if rent_couldhat!=. & rent_could==. & w1_hhid!=w1_hhid[_n-1]
replace rent_couldimpute=0 if lnrent_couldhat==. | rent_could!=. & w1_hhid!=w1_hhid[_n-1]
replace rent_could=rent_couldhat if rent_couldimpute==1

*Imputation summary variable for don't own don't rent, implied rental income
sort w1_hhid pid
gen rent_could_flg=1 if rent_could!=. & (homestatus==2 | homestatus==3) & w1_hhid!=w1_hhid[_n-1]
replace rent_could_flg=2 if rent_couldimpute==1 & (homestatus==2 | homestatus==3) & w1_hhid!=w1_hhid[_n-1]
replace rent_could_flg=3 if rent_could==. & (homestatus==2 | homestatus==3) & w1_hhid!=w1_hhid[_n-1]
label variable rent_could_flg "The household income as per household questionnaire data for this individual is:"
label define rent_could_flg 1 "Survey" 2 "Imputed" 3 "Not imputed", modify
label values rent_could_flg rent_could_flg
replace lnrent_could=ln(rent_could)

*Setting the value for all members of the household
egen rent_couldtemp=max(rent_could), by(w1_hhid)
replace rent_could=rent_couldtemp
egen rent_could_flgtemp=max(rent_could_flg), by(w1_hhid)
replace rent_could_flg=rent_could_flgtemp
drop *temp


***Aggregating

*Aggregating into implied rental income
gen imprent=.
replace imprent=rent_would if homestatus==4
replace imprent=rent_could if homestatus==2 | homestatus==3
gen imprent_flg=.
replace imprent_flg=rent_would_flg if homestatus==4
replace imprent_flg=rent_could_flg if homestatus==2 | homestatus==3
replace imprent_flg=2 if imprent_flg==. & homestatus!=1 /*these are the guys whose housing situation is unknown*/
label define imprent_flg 1 "Survey" 2 "Imputed", modify
label values imprent_flg imprent_flg
label variable imprent "Household monthly implied rental income"
label variable imprent_flg "The monthly household implied rental income data is from:

***Imputing for those who's housing situation remains missing
sort w1_hhid pid
gen lnimprent=ln(imprent)
xi: regress lnimprent i.province i.hometype i.homewalls i.homeroof homerooms homeroomssq homerooms_d i.hhrace farm traditional hhedu hhedusq i.intmonth hhage hhage_d hhtu mortgage if w1_hhid!=w1_hhid[_n-1]
predict lnimprenthat if w1_hhid!=w1_hhid[_n-1] & homestatus==5
gen imprenthat=exp(lnimprenthat)
gen imprentimpute=1 if imprenthat!=. & imprent==. & w1_hhid!=w1_hhid[_n-1]
replace imprentimpute=0 if lnimprenthat==. | imprent!=. & w1_hhid!=w1_hhid[_n-1]
replace imprent=imprenthat if imprentimpute==1

*Setting the value for all members of the household
egen imprenttemp=max(imprent) if homestatus==5, by(w1_hhid)
replace imprent=imprenttemp if homestatus==5
drop *temp

save "$DataOUT\impdata.dta", replace

* end of do fle 
*========================================================================================================================================
